from pyspark import SparkContext,SparkConf
import os
os.environ['PYSPARK_PYTHON']="D:/python/python.exe"
conf=SparkConf().setMaster("local[*]").setAppName("test_spark")
sc=SparkContext(conf=conf)

# 准备一个RDD
rdd=sc.parallelize([1,1,2,3,4,4,5,6,6,7,88])

# 对RDD数据进行去重
rdd2=rdd.distinct()

print(rdd2.collect())